home *** CD-ROM | disk | FTP | other *** search
Text File | 1993-04-09 | 3.6 KB | 132 lines | [TEXT/????] |
- #$LongestLines: print list of longest lines in a document
- #Variables:
- # how_many=number of long lines to track
- # (default 8 longest)
- # spaces_in_tab=the number of spaces in a tab
- # (default 4 spaces)
- # acceptable_length=length below which line is
- # ignored here (default 40 characters)
-
- #Use with any input option, sends results to stdout.
-
- #A tab counts for 1 to spaces_in_tab spaces, never 0, and always dependent
- #on position: if position on line counted from 0 up, then
- #on hitting a tab, go to next tab “stop” with
- # stop = position/spaces_in_tab + 1
- # position = stop * spaces_in_tab
- # or position = (position/spaces_in_tab + 1) * spaces_in_tab
-
- # User’s Manual references:
- # «hAWK User’s Manual» «F Running hAWK programs»
- # «hAWK User’s Manual» «L 5 Regular expressions»
- # «hAWK User’s Manual» «M 5 Built-in string and file functions»
- # «hAWK User’s Manual» «K 4 Built-in variables»
- # «hAWK User’s Manual» «K 8 Arrays»
- # «hAWK User’s Manual» «N User-defined functions»
- # «hAWK User’s Manual» «P 3 The getline function»
- # «hAWK User’s Manual» «O 3 Output into files»
- # «hAWK User’s Manual» «Q The hAWK function»
-
- BEGIN { if (spaces_in_tab == "")
- spaces_in_tab = 4
- if (how_many == "")
- how_many = 8
- if (acceptable_length == "")
- acceptable_length = 40
- progressFile = STDPATH "$tempProgress"
- }
-
- FNR == 1 { if (line[1] != "")
- {
- if (!progress("sorting..."))
- { # concurrent mode, print progress to file
- print "sorting..." > progressFile
- close(progressFile)
- }
- print "line", "\t\t\t", "length (if tabs were converted to spaces)"
- max = sort(line, ind, "rn")
- for (i = 1; i <= how_many && i <= max; ++i)
- {
- print ind[i], "\t\t\t", line[ind[i]]
- delete line[ind[i]]
- }
- --i;
- print ""
- }
- z = split(FILENAME, names, ":")
- ++numFiles
- print names[z], "longest lines are:"
- }
-
- {
- #In a C program, most lines start with tabs but they are
- #less frequent in other positions. Character-by-character
- #analysis is expensive in hAWK, so it’s better to handle
- #special cases using more elegant functions.
- #Still, rather slow, so show what’s happening every 100 lines...
- if (FNR == int(FNR/100)*100)
- {
- if (!progress(names[z] FNR))
- { # concurrent mode, print progress to file
- print names[z], FNR > progressFile
- close(progressFile)
- }
- }
- rawlen = length($0)
- if (match($0,/^\t+/)) #tabs at beginning of line
- {
- position = RLENGTH*spaces_in_tab #account for starting tabs
- temp = substr($0,RLENGTH+1) #trim off the starting tabs
- if (index(temp,"\t") == 0) #no more tabs?
- position += rawlen - RLENGTH
- else #tabs elsewhere besides at start of line
- {
- rawlen -= RLENGTH
- for (i = 1; i <= rawlen; ++i)
- {
- char = substr(temp,i,1)
- if (char == "\t")
- position = (int(position/spaces_in_tab) + 1) * spaces_in_tab;
- else
- ++position;
- }
- }
- }
- else if (index($0,"\t") == 0) #no tabs in line - not very common
- {
- position = rawlen
- }
- else #tabs somewhere besides beginning of line
- {
- position = 0
- for (i = 1; i <= rawlen; ++i)
- {
- char = substr($0,i,1)
- if (char == "\t")
- position = (int(position/spaces_in_tab) + 1) * spaces_in_tab;
- else
- ++position;
- }
- }
- if (position > acceptable_length)
- line[FNR] = position
- }
-
- END {if (!progress("sorting..."))
- { # concurrent mode, print progress to file
- print "sorting..." > progressFile
- close(progressFile)
- }
- if (line[1] != "")
- {
- print "line", "\t\t\t", "length (if tabs were converted to spaces)"
- max = sort(line, ind, "rn")
- for (i = 1; i <= how_many && i <= max; ++i)
- {
- print ind[i], "\t\t\t", line[ind[i]]
- }
- --i;
- print ""
- }
- }
-